


clear all
set more off
set matsize 11000

local gtools "g"  // code uses gtools. if you don't have gtools add "local gtools """ to your else if


*---------------------------------David's folders-------------------
if "`c(username)'"=="da334" | "`c(username)'"=="David" | "`c(username)'"=="Atkin" | "`c(username)'"=="dga" | "`c(username)'"=="atkin" {
global dump "C:/Scratch"
global dropbox "C:/Work/Engel_GFT"
global stataloc "C:/Dropbox/Stata15/StataMP-64"
global codeloc "$dropbox/replication_files/do_files"
global output "$dropbox/replication_files/data/intermediate_data/conventional_price_indices"
}
if "`c(username)'"=="dga" {
global dump "G:/Scratch" 
}
*-------------------------------------------------------------------















* For final publication: "no replacement of negative or missing values"

*Adjusting relative Engel curves for relative price changes

* When estimating P0(y0), adjust t0 Engel curve and use prices (by "i") with t0 weights
* When estimating P1(y1), adjust t1 Engel curve and use prices (by "i") with t1 weights

* only need to do it by decile (1 to 9). 

* Input data:

*1) prices d_ln_p_goods_i_55_43.dta

*2) Engel curves Engel_curves \data\intermediate_data\Engel_curves\Engel_V1
* e.g.:
* Engel_g25_rshare_V1_DM_G108_15_221.dta 
* which is for the rural district numbered 221




**************************************************************************************
**************************************************************************************


local foldno="1"

global working_dir "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'"



* Here set the price elasticity (sigma=0.7 in our baseline calibration)
global sigma_value 0.7

cd $working_dir 


**************************************************************************************
**************************************************************************************


use "$dropbox/replication_files/data/intermediate_data/conventional_price_indices/d_ln_p_goods_i_55_43.dta", clear
tab sector
keep if sector == "Rural"
drop sector
sort state43 district43 good_i

/* Variables in the price data:
sector 
state43 
district43 
dist_wt 
good_i 
sum_demo_shares_*
sum_pluto_shares_*
d_ln_p_pluto_55_43_* 
d_ln_p_demo_55_43_*
*/

/* We should have 407 rural markets, no missing obs:
sum sum_demo_shares_*
sum sum_pluto_shares_*
sum d_ln_p_pluto_55_43_* 
sum d_ln_p_demo_55_43_*
*/

collapse (mean) dist_wt sum_demo_shares_* sum_pluto_shares_* d_ln_p_pluto_55_43_* d_ln_p_demo_55_43_*, by(state43 district43)



sort state43 district43
compress
save delta_prices, replace




*_______________________________________________________________________________________________________________

* Engel curves:




/* variables in the Engel curve data:
r43market_year_id 
market_id 
sector 
state43 
district43 
r43round 
r43count_hh 
r43total_wt 
r43predict_lnmpcew 
percentile 
r43lp_wbwg25_rshare_*
r43se_wbwg25_rshare_*
r55market_year_id 
r55round 
r55count_hh 
r55total_wt 
r55predict_lnmpcew 
r55lp_wbwg25_rshare_*
r55se_wbwg25_rshare_*
*/


*________________________________________________________________________________________________

***LOOP OVER 445 MARKETS DTA FILE***
forvalues m=1(1)445 {
cap use "${dump}/Engel`foldno'/Engel_g25_rshare_V1_DM_G108_15_`m'_bs0.dta", clear
if _rc==0 {
display "Market `m'

drop r43se* r55se*

display "*** CHECK sector is 1:***"
tab sector



qui merge m:1 state43 district43 using delta_prices
qui keep if _merge == 3

display "*** CHECK Nb OF OBS:***"
count 


display " "
gen error=0
forvalues i=1/34 {
**** HERE WRITE IF CONDITION DEPENDING ON WHETHER THERE IS A MISSING:
egen error_r43 = max( (r43lp_wbwg25_rshare_`i' == .) )
egen error_r55 = max( (r55lp_wbwg25_rshare_`i' == .) )
if (error_r43==1 | error_r55==1)  {
replace error=1
display "ERROR!!"
display "ERROR!! MISSING OBS FOR GOOD `i' IN MARKET `m'"
display "ERROR!!"
}
drop error_*
}
*


gen sigma = $sigma_value


***************************************************************************************************************************************
** HERE BEWARE OF POTENTIAL SIGN MISTAKE: + OR - SIG? Dlog prices are 55 prices / 43 prices (double checked, Jul 2019). 
forvalues i=1/34 {
gen r43_prod_pluto_`i' = r43lp_wbwg25_rshare_`i' * exp(-(sigma-1) * d_ln_p_pluto_55_43_`i')
gen r55_prod_pluto_`i' = r55lp_wbwg25_rshare_`i' * exp((sigma-1) * d_ln_p_pluto_55_43_`i')

}
***************************************************************************************************************************************


foreach round in r43 r55 {
foreach type in pluto {

*Sums across G2- foods unprocessed: 
gen `round'_prod_`type'_sumG2 = 0
foreach i in 1 2 3 5 6 9 11 15 17 20 21 22 25 26 27 28 {
replace `round'_prod_`type'_sumG2 = `round'_prod_`type'_sumG2 + `round'_prod_`type'_`i'
}

*Sums across G3, slightly processed: 
gen `round'_prod_`type'_sumG3 = 0
foreach i in 10 12 16 18 19 23 24 29 30 31 32 33 34 {
replace `round'_prod_`type'_sumG3 = `round'_prod_`type'_sumG3 + `round'_prod_`type'_`i'
}

*Sums across G5, light-fuel:
gen `round'_prod_`type'_sumG5 = 0
foreach i in 4 7 8 13 14 {
replace `round'_prod_`type'_sumG5 = `round'_prod_`type'_sumG5 + `round'_prod_`type'_`i'
}

}
*End of loops across types (`type')

* Now: compute different types:
*- demo vs. pluto price indices (baseline: just "pluto")

*Note: hitting the 32-characters limit on variable names...: 
foreach i in 1 2 3 5 6 9 11 15 17 20 21 22 25 26 27 28 {
gen `round'lp_wbwg25_rshare_pluto_`i' = `round'_prod_pluto_`i' / `round'_prod_pluto_sumG2
* initial variable name: `round'lp_wbwg25_rshare_*
}

foreach i in 10 12 16 18 19 23 24 29 30 31 32 33 34 {
gen `round'lp_wbwg25_rshare_pluto_`i' = `round'_prod_pluto_`i' / `round'_prod_pluto_sumG3
}

foreach i in 4 7 8 13 14 {
gen `round'lp_wbwg25_rshare_pluto_`i' = `round'_prod_pluto_`i' / `round'_prod_pluto_sumG5
}

drop `round'_prod_*  
} 
*End of loop across rounds (`round')

drop sum_demo* sum_pluto* d_ln_p* _merge 

if error==0 {
drop error
qui save "${dump}/Engel`foldno'/Engel_adjust_redo_`m'_bs0.dta", replace

}
* End of error statement
}
* End of "if" statement
}
* End of loop across markets (`m')



**/









local bs=0

global share_spec "rshare"   // this is the type of shares in that file that we use, only takes one at a time for now
global band_spec "g25"  // this is bandwidth

*the two adjusted specs (one for P0 and one for P1_
foreach foldno in      "1exactSnrP1"  "1exactSnrP0"  { 

local foldnocut=regexr("`foldno'","exact.*","")

cap mkdir "${dump}/Engel`foldno'/"  // this is just a scratch folder for temp files
capture mkdir "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/" // this is where dtas go at end





foreach zzz in "15"  {   // "15_8i"  "15_136i"
global Gi_spec "V1_DM_G108_`zzz'"   // This is the hh file we use, this calls "$dropbox/replication_files/data/intermediate_data/hh_shares/hh_shares_43_50_55${Gi_spec}_rural.dta" and uses whatever relative shares are in that


*This pulls the number of markets from the data do this, or just loop over 900 (there are 869)
use "$dropbox/replication_files/data/intermediate_data/hh_shares/hh_shares_43_55${Gi_spec}_rural.dta"  , clear  // hh_shares_43_50_55_V2_DMI_rural.dta
sum market_id
local nmarket=r(max)
keep ${share_spec}_*
local prodcount=c(k)
*local nmarket=900








******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************

*Block 1: Run loops over each market estimating Engel curves and then processing them

******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************







****************************************
*1ai) Take adjusted relative Engel curves  and combine with unadjusted Enegel curve 
****************************************



local instances=1  // number of stata windows run simultaneously. Lower this number if you are running into bottlenecks
*local blocksize=round(`nmarket_year'/`instances')
local blocksize=round(`nmarket'/`instances')

local instcount=0 


forval market=1/`nmarket' {


*pull in unadjusted curve files

cap use "${dump}/Engel`foldnocut'/Engel_${band_spec}_${share_spec}_${Gi_spec}_`market'_bs0.dta", clear

if _rc==0 {

if regexm("`foldno'","P1")==1 {
	*bring in new adjusted curves
	drop r55lp_wbw${band_spec}_${share_spec}_*

	if regexm("`foldno'","nr")==1 {
		if regexm("`foldno'","exactS")==1 {
		cap merge 1:1 percentile using "${dump}/Engel`foldnocut'/Engel_adjust_redo_`market'_bs0.dta", keep(match master) keepusing(r55lp_wbw${band_spec}_${share_spec}_pluto_*)
		if _rc==0 {
		local eek=1 
		}
		else {
		local eek=0
		}
		}	
	}



}



if regexm("`foldno'","P0")==1 {

	*bring in new adjusted curves
	drop r43lp_wbw${band_spec}_${share_spec}_*

	if regexm("`foldno'","nr")==1 {
		if regexm("`foldno'","exactS")==1 {
		cap merge 1:1 percentile using "${dump}/Engel`foldnocut'/Engel_adjust_redo_`market'_bs0.dta", keep(match master) keepusing(r43lp_wbw${band_spec}_${share_spec}_pluto_*)
		if _rc==0 {
		local eek=1 
		}
		else {
		local eek=0
		}
		}	

	}


}

cap renvars *lp_wbw${band_spec}_${share_spec}_pluto_*, sub(pluto_ )


if `eek'==1 {
save "${dump}/Engel`foldno'/Engel_${band_spec}_${share_spec}_${Gi_spec}_`market'_bs0.dta", replace
}
}
*from cap




}

**/








****************************************
*1b) now take engel curves and calaculate P0, P1, welfare etc
****************************************



local instances=8  // number of stata windows run simultaneously. Lower this number if you are running into bottlenecks
*local blocksize=round(`nmarket_year'/`instances')
local blocksize=round(`nmarket'/`instances')


local instcount=0 

forval xstart=1(`blocksize')`nmarket' { 
local xfinish=`xstart'+`blocksize'-1
local instcount=`instcount'+1
winexec "${stataloc}" do "${codeloc}/engel_lpoly_analyze_V`foldnocut'_bs.do" `xstart' `xfinish' ${Gi_spec} ${share_spec} ${band_spec}  "${dump}" "${dropbox}" `instcount'  `foldno' `bs'
sleep 10000
}


local counter=0
while `counter'<`instances' {

local counter=0
forval checker=1/`instances' {
	capture confirm file "${dump}/Engel`foldno'/InstanceB_${band_spec}_${share_spec}_${Gi_spec}_`checker'_bs`bs'.dta"
		if _rc==0 {
		local counter=`counter'+1
		}
	}
sleep 10000
}

forval checker=1/`instances' {
erase "${dump}/Engel`foldno'/InstanceB_${band_spec}_${share_spec}_${Gi_spec}_`checker'_bs`bs'.dta"
}
local checker1=`instances'+1
cap erase "${dump}/Engel`foldno'/InstanceB_${band_spec}_${share_spec}_${Gi_spec}_`checker1'_bs`bs'.dta"

**/








******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************

*Block 2: Combine Markets and Create Market-Decile and National-Decile Averages

******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************



****************************************
**2a) First combine results to make dataset 
****************************************





foreach band in   $band_spec       {  
foreach share in  $share_spec  { 
foreach DMI in  $Gi_spec  { 




local shareshort=substr("`share'",1,3)



clear
forval market=1/`nmarket' {
capture confirm file "${dump}/Engel`foldno'/Engel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta"
if _rc==0 {
use "${dump}/Engel`foldno'/Engel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta", clear
cap drop Z* 

cap renvars S*, postsub(x )


cap drop r??predict_lnmpcew 
cap drop r??round 
cap drop r??market_year_id


save "${dump}/Engel`foldno'/XEngel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta", replace
erase "${dump}/Engel`foldno'/Engel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta"
*to clean up the individual engel curves delete except for bs=0 that need to adjust
if "`bs'"!="0" {
erase "${dump}/Engel`foldno'/Engel_`band'_`share'_`DMI'_`market'_bs`bs'.dta"  
}

}
}


clear
forval market=1/`nmarket' {
noi di "append `market'"
cap append using "${dump}/Engel`foldno'/XEngel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta", force
cap erase "${dump}/Engel`foldno'/XEngel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta"
}



egen id=group(market_id percentile)

renvars id market_id sector state43 district43 r43count_hh r43total_wt percentile r55count_hh r55total_wt  avg_count_hh, prefix(X)
cap renvars  r50count_hh r50total_wt , prefix(X)
save "${dump}/Engel`foldno'/XEngel_wide_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", replace




use W* X*  using  "${dump}/Engel`foldno'/XEngel_wide_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", clear
renvars X*, presub(X)

local stublist "" 
	qui ds *_p50
	local stubs "`r(varlist)'"
	tokenize `stubs'
	forval n=1/20000 {
	local `n'=regexr("``n''","_p50$","")
	local stublist `stublist' ``n''
	}

noi di "reshape"


reshape long  `stublist' , i(id) j(item_decile) string
gen decile=regexr(item_decile,"_p","")
destring decile, replace
keep W* market_id decile
egen tag=tag(market_id decile)
keep if tag==1
drop tag
save "${dump}/Engel`foldno'/XEngel_all_rindexes_W_`band'_`share'_`DMI'_bs`bs'.dta", replace


forval g=1/`prodcount' {
use *`shareshort'*_`g'_* X*  using  "${dump}/Engel`foldno'/XEngel_wide_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", clear

renvars X*, presub(X)

local stublist "" 
	qui ds *_`g'_p50
	local stubs "`r(varlist)'"
	tokenize `stubs'
	forval n=1/20000 {
	local `n'=regexr("``n''","_`g'_p50$","")
	local stublist `stublist' ``n''
	}

noi di "reshape"


reshape long  `stublist' , i(id) j(item_decile) string

gen decile=regexr(item_decile,".*_p","")
destring decile, replace
gen item_id=regexr(item_decile,"_p.*","")
replace item_id=regexr(item_id,"^_","")
destring item_id, replace


drop id 

save "${dump}/Engel`foldno'/XEngel_all_rindexes_g`g'_`band'_`share'_`DMI'_bs`bs'.dta", replace
}

clear
forval g=1/`prodcount' {
append using "${dump}/Engel`foldno'/XEngel_all_rindexes_g`g'_`band'_`share'_`DMI'_bs`bs'.dta"
erase "${dump}/Engel`foldno'/XEngel_all_rindexes_g`g'_`band'_`share'_`DMI'_bs`bs'.dta"
}

merge m:1 market_id decile using "${dump}/Engel`foldno'/XEngel_all_rindexes_W_`band'_`share'_`DMI'_bs`bs'.dta", nogenerate

save "${dump}/Engel`foldno'/XEngel_all_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", replace   
erase "${dump}/Engel`foldno'/XEngel_wide_rindexes_`band'_`share'_`DMI'_bs`bs'.dta"
erase "${dump}/Engel`foldno'/XEngel_all_rindexes_W_`band'_`share'_`DMI'_bs`bs'.dta"

}
}
}



**/




****************************************
**2b) Now flag non monotonic curves etc.
****************************************



foreach band in   $band_spec       {  
foreach share in  $share_spec  { 
foreach DMI in  $Gi_spec  { 


local shareshort=substr("`share'",1,3)


use "${dump}/Engel`foldno'/XEngel_all_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", clear 

erase "${dump}/Engel`foldno'/XEngel_all_rindexes_`band'_`share'_`DMI'_bs`bs'.dta"

local starbwlist "" 
qui ds P4355lp_*_`shareshort'*
local stubs "`r(varlist)'"
tokenize `stubs'
forval n=1/20000 {
local `n'=regexr("``n''","^P4355lp_","P*")
local starbwlist `starbwlist' ``n''
}



*exclude non monotonic curves (make this an extra variant)
sort market_id decile item_id percentile
foreach var of varlist `starbwlist'  { 
local namer=regexr("`var'","_`shareshort'","m_`shareshort'")
gen `namer'=`var'  
replace `namer'=. if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="0"  // replace value=. if not monotonic
replace `namer'=98 if percentile>9999 & percentile<=99999 & `namer'==. & substr(string(`var'[_n+3]),-1,1)=="0" // reason not monotonic
if regexm("`var'","^P")==1 {
gen m`namer'=1 if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)!="0"  // indicator for monotonicity
}
}


*exclude non monotonic curves when both rounds are monotonic (make this an extra variant)
sort market_id decile item_id percentile
foreach var of varlist `starbwlist'  {  
local namer=regexr("`var'","_`shareshort'","n_`shareshort'")
gen `namer'=`var' if percentile>9999 
replace `namer'=`var' if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="1"  &  substr(string(`var'[_n+6]),-4,1)=="1"  // replace value=. if not monotonic
replace `namer'=`var' if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="2"  &  substr(string(`var'[_n+6]),-4,1)=="2"  // replace value=. if not monotonic
replace `namer'=98 if percentile>9999 & percentile<=99999 & `namer'==. & (substr(string(`var'[_n+3]),-1,1)=="0" | substr(string(`var'[_n+3]),-4,1)=="0") // reason
replace `namer'=97 if percentile>9999 & percentile<=99999 & `namer'==. & (substr(string(`var'[_n+3]),-1,1)=="1" & substr(string(`var'[_n+3]),-4,1)=="2") // reason
replace `namer'=97 if percentile>9999 & percentile<=99999 & `namer'==. & (substr(string(`var'[_n+3]),-1,1)=="2" & substr(string(`var'[_n+3]),-4,1)=="1") // reason

if regexm("`var'","^P")==1 {
gen n`namer'=1 if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="1"  &  substr(string(`var'[_n+6]),-4,1)=="1"  // indicator for monotonicity
replace  n`namer'=1 if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="2"  &  substr(string(`var'[_n+6]),-4,1)=="2" 
}

}

egen tag_md=tag(percentile decile market_id)


cap merge m:1 item_id using "$dropbox/replication_files/data/intermediate_data/hh_shares/item_codes`DMI'.dta", keepusing(supergroup*) nogenerate

compress

save "${dump}/Engel`foldno'/temp_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", replace




}
}
}



**/



****************************************
*2c) Now calculate averages at the market_decile level
****************************************



qui {


foreach band in   wbw$band_spec       {  
foreach share in  $share_spec  { 
foreach DMI in  $Gi_spec  {


local xshare "`share'"


use "${dump}/Engel`foldno'/temp_rindexes_${band_spec}_`xshare'_`DMI'_bs`bs'.dta", clear

erase "${dump}/Engel`foldno'/temp_rindexes_${band_spec}_`xshare'_`DMI'_bs`bs'.dta"

`supergroup_drop'





*clean up and keep the cen specification
renvars  *`band'cen_*  , prefix(x)  
drop P*`band'* 

drop T*`band'*
cap drop n*`band'* 
cap drop m*`band'*
renvars x*`band'*, presub("x")




local bwlist "" 
qui ds  P4355lp_`band'*_`xshare' // 
local stubs "`r(varlist)'"
tokenize `stubs'
forval n=1/20000 {
local `n'=regexr("``n''","_`xshare'$","")
local `n'=regexr("``n''","^P4355lp_","")
local bwlist `bwlist' ``n''
}


local xshareshort=substr("`xshare'",1,3)
local xsharelist "" 
local firstbw=word("`bwlist'",-1)


qui ds  nP4355lp_`firstbw'_`xshareshort'* // 
local stubs "`r(varlist)'"
tokenize `stubs'
forval n2=1/20000 {
local `n2'=regexr("``n2''","^nP4355lp_`firstbw'_","")
local xsharelist `xsharelist' ``n2''
}






sort market_id decile item_id percentile
`gtools'egen group=group(decile market_id)  
sum group
local groupmax=r(max)

gen r4355total_wt=r43total_wt + r55total_wt
gen r5543total_wt=r43total_wt + r55total_wt

cap gen r4350total_wt=r43total_wt + r50total_wt
cap gen r5043total_wt=r43total_wt + r50total_wt

cap gen r5550total_wt=r55total_wt + r50total_wt
cap gen r5055total_wt=r55total_wt + r50total_wt

cap gen r555043total_wt=r55total_wt + r50total_wt + r43total_wt 
cap gen r435055total_wt=r55total_wt + r50total_wt + r43total_wt 


noi di "`xsharelist'"

foreach xsh in   `xsharelist'  {  




preserve



foreach xxxsh in   `xsharelist'  {
if "`xxxsh'"!="`xsh'" {
drop *`xxxsh'*
}
}


noi di "making Good_by_good_`xsh'_`band'_v4_`DMI'.dta"

noi di "`bwlist'"

foreach bw in   `bwlist'  {  




noi di "`bw'"




foreach tip in 4355 5543   { 

local nout=regexr("S`tip'lp_`bw'_`xsh'","n_`xsh'","_`xsh'")
cap gen S`tip'lp_`bw'_`xsh'=`nout'

`gtools'egen P`tip'lp_n`bw'_`xsh'=mean(P`tip'lp_`bw'_`xsh') if percentile<=9999,by(percentile decile market_id)

`gtools'egen P`tip'lp_m`bw'_`xsh'=median(P`tip'lp_`bw'_`xsh') if percentile<=9999,by(percentile decile market_id)



**********so now we have the averages across goods for each market


*fix reason for miss entries for n and m that should be missing (when non monotonic)
if regexm("`bw'","m$")==1  {
  replace P`tip'lp_`bw'_`xsh'=.  if percentile>9999 & percentile<=99999   &   mP`tip'lp_`bw'_`xsh'[_n-3]!=1  
}
if regexm("`bw'","n$")==1 {
  replace P`tip'lp_`bw'_`xsh'=.  if percentile>9999 & percentile<=99999   &   nP`tip'lp_`bw'_`xsh'[_n-3]!=1  
}

*this counts the number of non missings (or missings if on percentile>9999
`gtools'egen oP`tip'lp_`bw'_`xsh'=count(P`tip'lp_`bw'_`xsh'),by(percentile decile market_id)



if regexm("`bw'","m$")==1 {
 `gtools'egen xP`tip'lp_`bw'_`xsh'=count(mP`tip'lp_`bw'_`xsh'),by(percentile decile market_id) // just count of monotonic
 gen omP`tip'lp_`bw'_`xsh'=oP`tip'lp_`bw'_`xsh'/xP`tip'lp_`bw'_`xsh'

 gen temp=P`tip'lp_`bw'_`xsh'[_n+3]  if percentile<=9999    &   mP`tip'lp_`bw'_`xsh'==1  
 replace temp=. if temp==99 // if origin is non monotonic we get 99
 `gtools'egen amP`tip'lp_`bw'_`xsh'=mean(temp), by(percentile decile market_id) // degree of overlap above/below
 drop temp
}

if regexm("`bw'","n$")==1 {
 `gtools'egen xP`tip'lp_`bw'_`xsh'=count(nP`tip'lp_`bw'_`xsh'),by(percentile decile market_id) // just count of monotonic
 gen onP`tip'lp_`bw'_`xsh'=oP`tip'lp_`bw'_`xsh'/xP`tip'lp_`bw'_`xsh'

 *use reasons for no overlap when monotonic (above or below)
 gen temp=P`tip'lp_`bw'_`xsh'[_n+3]  if percentile<=9999    &   nP`tip'lp_`bw'_`xsh'==1  
 replace temp=. if temp==99 // these should not exist
 `gtools'egen anP`tip'lp_`bw'_`xsh'=mean(temp), by(percentile decile market_id) // degree of overlap above/below
 drop temp
 
 
 ********************************************************************
 ****bias correction in this clean case (when know if above or below)
 
 
  
 `gtools'egen xmin=min(P`tip'lp_`bw'_`xsh'), by(percentile decile market_id)
 `gtools'egen xmax=max(P`tip'lp_`bw'_`xsh'), by(percentile decile market_id)
 
  gen xrank=P`tip'lp_`bw'_`xsh' if percentile<=9999    &   nP`tip'lp_`bw'_`xsh'==1 
  *now replace missings with large or small value depending which end it is at
  replace xrank=xmin-0.000001 if P`tip'lp_`bw'_`xsh'[_n+3]==1 & percentile<=9999    &   nP`tip'lp_`bw'_`xsh'==1  & P`tip'lp_`bw'_`xsh'==.
  * start going forward: 43-->55. so what is going on here is if we have +1 in [_n+3] it means we are too poor to hit. that means the missing values have the smallest price index changes (and the price index changes we do observe are too large)
  * so give the lowest possible value -999999 
  replace xrank=xmax+0.000001 if P`tip'lp_`bw'_`xsh'[_n+3]==-1 & percentile<=9999    &   nP`tip'lp_`bw'_`xsh'==1  & P`tip'lp_`bw'_`xsh'==.
  *similarly if we have -1 we are too rich to hit, then the observed price changes are smaller than when we miss so want a large possible value. 
  *everything is flipped if going 55-->43 but since price index is negative in that case code is same.
   
 `gtools'egen xmed=median(xrank),by(percentile decile market_id)
 *so now take the median which is estimate of mean of syymetric distribution
 
  gen P`tip'lp_z`bw'_`xsh'=xmed if xmed<=(xmax) & xmed>=(xmin)
  *so this replaces with missing if don't actually observe median
  
 
   *if observe missings at one end or the other, use (min-max)/onP as range and min or max as estimate of end.
   gen P`tip'lp_x`bw'_`xsh'=P`tip'lp_z`bw'_`xsh'
   replace P`tip'lp_x`bw'_`xsh'=xmin+(xmax-xmin)/(onP`tip'lp_`bw'_`xsh'*2) if  P`tip'lp_x`bw'_`xsh'==. & anP`tip'lp_`bw'_`xsh'==-1 // this is median if observe min
   replace P`tip'lp_x`bw'_`xsh'=xmax-(xmax-xmin)/(onP`tip'lp_`bw'_`xsh'*2) if  P`tip'lp_x`bw'_`xsh'==. & anP`tip'lp_`bw'_`xsh'==1 // this is median if observe min
   *this probably deals with most cases. rare when have missing either end and don't estimate median
  
  
   gen P`tip'lp_y`bw'_`xsh'=xmed if xmed<=(xmax+0.0000006) & xmed>=(xmin-0.0000006)
   *now replace median with max/min when even number and max/min in middle (stata's median takes average)
  
   *if observe missings at one end or the other, use (min-max)/onP as range and min or max as estimate of end.
   gen P`tip'lp_w`bw'_`xsh'=P`tip'lp_y`bw'_`xsh'
   replace P`tip'lp_w`bw'_`xsh'=xmin+(xmax-xmin)/(onP`tip'lp_`bw'_`xsh'*2) if  P`tip'lp_w`bw'_`xsh'==. & anP`tip'lp_`bw'_`xsh'==-1 // this is median if observe min
   replace P`tip'lp_w`bw'_`xsh'=xmax-(xmax-xmin)/(onP`tip'lp_`bw'_`xsh'*2) if  P`tip'lp_w`bw'_`xsh'==. & anP`tip'lp_`bw'_`xsh'==1 // this is median if observe min
   *this probably deals with most cases. rare when have missing either end and don't estimate median
 
 
 
 * Sarhan correction when missing median
 gen  xwt=1 if xrank!=.
 gen xwt_x1=xwt
 replace xwt_x1=0 if (xrank>=xmin)  & xrank!=.
 gen xwt_x2=xwt
 replace xwt_x2=0 if (xrank<xmin |  xrank>xmax) & xrank!=.
 gen xwt_x3=xwt
 replace xwt_x3=0 if (xrank<=xmax) & xrank!=.
 
  `gtools'egen xsum1=total( xwt_x1) , by(percentile decile market_id)
  `gtools'egen xsum2=total( xwt_x2) , by(percentile decile market_id)
  `gtools'egen xsum3=total( xwt_x3) , by(percentile decile market_id)
 *this is from  ESTIMATION OF THE MEAN AND STANDARD DEVIATION BY ORDER STATISTICS. PART III, A. E. SARHAN. 
 gen P`tip'lp_u`bw'_`xsh'=0.5*(1/(xsum1+xsum2+xsum3-xsum1-xsum3-1))*((xsum1+xsum2+xsum3-2*xsum3-1)*xmin + (xsum1+xsum2+xsum3-2*xsum1-1)*xmax)
 
 
 gen P`tip'lp_o`bw'_`xsh'=P`tip'lp_z`bw'_`xsh'
 replace P`tip'lp_o`bw'_`xsh'=P`tip'lp_u`bw'_`xsh' if P`tip'lp_z`bw'_`xsh'==.
 
 
 
 drop xwt* xsum*
 
 
 drop xrank xmax xmin xmed




 
}

*no crossing (when looking at 99901) (these are national)
gen temp=P`tip'lp_`bw'_`xsh' 
`gtools'egen o0P`tip'lp_`bw'_`xsh'=count(temp),by(percentile decile)
drop temp

*no crossing and above (when looking at 99901) (these are national)
gen temp=P`tip'lp_`bw'_`xsh' if P`tip'lp_`bw'_`xsh'==-1
`gtools'egen o1P`tip'lp_`bw'_`xsh'=count(temp),by(percentile decile)
drop temp

*no crossing and below (when looking at 99901) (these are national)
gen temp=P`tip'lp_`bw'_`xsh' if P`tip'lp_`bw'_`xsh'==1
`gtools'egen o3P`tip'lp_`bw'_`xsh'=count(temp),by(percentile decile)
drop temp


}




*clean up
foreach tip in 4355 5543  { 
drop P`tip'lp_`bw'_`xsh'
drop nP`tip'lp_`bw'_`xsh'
drop S`tip'lp_`bw'_`xsh'
}


}





foreach pvar of varlist P* {
local wvar=regexr("`pvar'","^P","W")
local rounder=regexr("`pvar'","^P","")
local rounder=regexr("`rounder'","lp.*","")
gen `wvar'=W`rounder'_lnmpcew - `pvar'
}

*these go backwards and I make negative
foreach rounds in  5543   { 
foreach zvar of varlist P`rounds'lp_* W`rounds'lp_* {  
replace `zvar'=-`zvar'
}
}

drop W*_lnmpcew
drop group

drop tag_md
`gtools'egen tag_md=tag(percentile decile market_id)

keep if tag_md==1
*keep if percentile<=9999
*so these are now at tag_md level, dataset of all the changes at each decile.

gen range=substr(string(percentile),-4,4)
gen stat=subinstr(string(percentile),range,"",1)
replace stat="mean" if stat==""
replace stat="no overlap reason" if stat=="9"
replace stat="monotonicity" if stat=="99"
drop percentile


renvars *`xsh', postsub(`xsh' XX )


reshape wide *XX ,  i(decile market_id stat) j(range) string

renvars *XX*, sub(XX )




*now make the long differences

local zbwlist "" 
qui ds  P4355lp_n* 
local stubs "`r(varlist)'"
tokenize `stubs'
forval n=1/20000 {
local `n'=regexr("``n''","^P4355lp_n","")
local zbwlist `zbwlist' ``n''
}



foreach zbw in  `zbwlist'  {

if regexm("`zbw'","m_9")==1 | regexm("`zbw'","n_9")==1  {
local mon=substr("`zbw'",-6,1)
*jsut average the percent that overlap across the two jumps
cap gen o`mon'P435055lp_`zbw'=(o`mon'P4350lp_`zbw'+o`mon'P5055lp_`zbw')/2
cap gen o`mon'P555043lp_`zbw'=(o`mon'P5043lp_`zbw'+o`mon'P5550lp_`zbw')/2
}



}



save "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Good_by_good_${band_spec}_`xsh'_`DMI'_bs`bs'.dta", replace




*now make a smaller clean file:
use "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Good_by_good_${band_spec}_`xsh'_`DMI'_bs`bs'.dta", clear


if ""=="_extrapolate" {
local niner "9900"
local extrape "e"
cap drop *9999 
cap drop *9901
}
else {
local niner "9901"
local extrape "cen"
*first, different specification just showing long changes
}

cap drop *9505 
cap drop *9703
drop ?P*
drop ??P*
keep if stat=="mean"

foreach tip in 4355 5543  {  
gen W`tip'_lnmpcew=W`tip'lp_n`band'`extrape'_`niner'+P`tip'lp_n`band'`extrape'_`niner'
}
save "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Short_${band_spec}_`xsh'_`DMI'_bs`bs'.dta", replace




restore

}




}
}

}


}



**/























***Now national paasche and laspeyres changes
 

foreach share  in $share_spec { 
foreach DMI in $Gi_spec { 
foreach band in $band_spec   { 

local cutoff "100" 
local specT "ce"
local spec2 "o"
local spec "`specT'n"






*merge in conventional CPI price indices from Deaton


*global output "$dropbox/replication_files/data/intermediate_data/conventional_price_indices"
use "$output/CPI_district_X_decile_level_V4", clear
keep if sector=="Rural"

gen dist_wt=((_43_dist_wt*10) +_55_dist_wt)/2

*use filled in CPI as main CPI
drop ?cpi_??_??_decile
rename Lcpi_fill_43_55_decile Lcpi_43_55_decile
rename Pcpi_fill_43_55_decile Pcpi_43_55_decile



merge m:1 sector state43 district43 using  "$output/CPI_district_level_V4" , keepusing(L* P*) keep(match master) nogenerate
rename Lcpi_fill_43_55 Lcpi_demo_43_55 
rename Pcpi_fill_43_55 Pcpi_demo_43_55 



drop sector
gen sector=1
merge m:1 sector state43 district43 using "$dropbox/replication_files/data/intermediate_data/R43R55/market_characteristics_43_55_rural.dta" , keepusing(r??_lmpce_n) keep(match master) nogenerate

gen decile=decile_dist*10

merge 1:1 sector state43 district43 decile using ///
"$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Short_`band'_`share'_`DMI'_bs`bs'.dta" , keepusing(W*_lnmpcew P*lp_`spec2'wbw`band'`spec'_9901 )  keep(match master) nogenerate

gen stat="mean"
merge 1:1 sector state43 district43 decile stat using ///
"$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Good_by_good_`band'_`share'_`DMI'_bs`bs'.dta" , keepusing(xP*lp_wbw`band'`spec'_9901 oP*lp_wbw`band'`spec'_9901)  keep(match master) nogenerate





keep if r43_lmpce_n>=`cutoff'  & r55_lmpce_n>=`cutoff'  




sum Lcpi_43_55_decile if decile_dist==1,d
if r(max)>200 {
winsor Lcpi_43_55_decile, gen(Lcpi_43_55_decile_2) p(.01)
sum Lcpi_43_55_decile_2 if decile_dist==1,d
drop Lcpi_43_55_decile
rename Lcpi_43_55_decile_2 Lcpi_43_55_decile
}












foreach var of varlist W*_lnmpcew P*lp_`spec2'wbw`band'*_9901  { 
gen V`var'=exp(`var')
}

foreach var of varlist Lcpi_* Pcpi_* {
gen Z`var'=log(`var')
}




foreach initial in 43  55 {   
foreach final in 43   55 { 

if  "`initial'`final'"!="5055" & "`initial'`final'"!="5550" {

if `initial'!=`final' {
bysort decile_dist: egen ZY_`initial'_`final'_x = wtmean( W`initial'`final'_lnmpcew ), weight(dist_wt)
gen ZY_`initial'`final'_decile=100*(exp(ZY_`initial'_`final'_x)-1)
}

if `initial'!=`final' {

bysort decile_dist: egen ZE_`initial'_`final'_x = wtmean( P`initial'`final'lp_`spec2'wbw`band'`spec'_9901), weight(dist_wt)  // this is log change

gen ZE_`initial'`final'_decile=100*(exp(ZE_`initial'_`final'_x)-1)

gen WWZE_`initial'`final'_decile=(100*(ZY_`initial'`final'_decile+100)/(ZE_`initial'`final'_decile+100))-100



}


if `initial'<`final' {



bysort decile_dist: egen ZL_`initial'_`final'_x = wtmean( ZLcpi_`initial'_`final'_decile ), weight(dist_wt)
bysort decile_dist: egen ZP_`initial'_`final'_x = wtmean( ZPcpi_`initial'_`final'_decile ), weight(dist_wt)


gen ZL_`initial'`final'_decile=100*(exp(ZL_`initial'_`final'_x)-1)
gen ZP_`initial'`final'_decile=100*(exp(ZP_`initial'_`final'_x)-1)



bysort decile_dist: egen ZLcpi_demo_`initial'_`final'_x = wtmean( ZLcpi_demo_`initial'_`final'), weight(dist_wt)
bysort decile_dist: egen ZPcpi_demo_`initial'_`final'_x = wtmean( ZPcpi_demo_`initial'_`final'), weight(dist_wt)


gen ZL_demo_`initial'`final'_decile=100*(exp(ZLcpi_demo_`initial'_`final'_x)-1)
gen ZP_demo_`initial'`final'_decile=100*(exp(ZPcpi_demo_`initial'_`final'_x)-1)


gen WWZL_`initial'`final'_decile=(100*(ZY_`initial'`final'_decile+100)/(ZL_`initial'`final'_decile+100))-100
gen WWZP_`initial'`final'_decile=(100*(ZY_`initial'`final'_decile+100)/(ZP_`initial'`final'_decile+100))-100


}


}
}
}

egen tag=tag(decile)
keep if tag==1


save "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Estimate_`band'_`share'_`DMI'_bs`bs'.dta", replace






}
}
}
**/

}
}


exit, clear STATA

